## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

The following plots are exploratory analysis and trial visualizations to include in the dashboard. The aesthetics and interactive capabilities are still in need of being improved. In addition, the margins of error

Financial

Median Houshold Income

barchart for 2018

ggplotly(ggplot(filter(acs_counties, year == 2018), aes(x = NAME, y = median_household_income,
                                                        text = paste0("Region: ", NAME,
                                                                      "<br>Year: ", year,
                                                                      "<br>Median Household Income: $", median_household_income,
                                                                      "<br>Margin of Error: $", median_household_income_moe)))+
  geom_col(fill = "dark blue")+
  geom_errorbar(aes(x = NAME, ymin = median_household_income - median_household_income_moe, 
                    ymax = median_household_income + median_household_income_moe), color = "dark orange") + 
  geom_point(color = "dark orange", size = 3)+ theme_minimal()+theme(axis.text.x = element_text(angle=45)) +
    ggtitle("Median Household Income") + ylab("Median Household Income") + xlab("Region"), tooltip="text")

Try a line chart with all years present

# grouped line chart for all years: each geography is its own color
p <- ggplot(acs_counties, aes(x=year, y=median_household_income, group = NAME, color = NAME,
                              text = paste0("Region: ", NAME,
                                            "<br>Year: ", year,
                                            "<br>Median Household Income: $", median_household_income,
                                            "<br>Margin of Error: $", median_household_income_moe))) +
  geom_line() + 
  geom_point() +
  scale_colour_manual(values = viridis_pal(option = "D")(15)) +
  #geom_pointrange(aes(ymin=median_household_income - median_household_income_moe, ymax=median_household_income + median_household_income_moe)) +
  theme_minimal() + ggtitle("Median Household Income 2015-2018") + ylab("Median Household Income") + xlab("Year")
#Note: Wasco and south wasco are from ACS5 year estimates. Moving averages.
ggplotly(p, tooltip = "text") %>% config(displayModeBar = "static", displaylogo = FALSE, 
                       modeBarButtonsToRemove=list("zoom2d","select2d","lasso2d",
                                                   "hoverClosestCartesian", "hoverCompareCartesian","resetScale2d"))

Easier to see trends, but lots of colors make it quite busy. Only drawback to the line graphs as opposed to the bar charts is that margin of error cannot be seen visually. But the tooltip provided by plotly is great for keeping that data.

Same line chart but colors are only kept for south wasco, wasco county, and the state geography.

p <- ggplot(acs_counties %>% mutate(south_wasco = fct_other(NAME, keep = c("South Wasco County School District 1, Oregon", "Wasco County, Oregon", "Oregon"), other_level = "Neighboring Counties"))
            , aes(x=year, y=median_household_income, group = NAME, color = south_wasco,
                  text = paste0("Region: ", NAME,
                                "<br>Year: ", year,
                                "<br>Median Household Income: $", median_household_income,
                                "<br>Margin of Error: $", median_household_income_moe))) +
  geom_line(size = 1.5) + 
  geom_point(size = 2) +
  scale_colour_manual(name = "Region", values = c(viridis(3, option = "D"), graypal)) +
  scale_alpha_manual(values=c(1,1,1,0.1)) +
  theme_minimal() + ggtitle("Median Household Income 2015-2018") + ylab("Median Household Income") + xlab("Year") 
#Note: Wasco and south wasco are from ACS5 year estimates. Moving averages.
ggplotly(p, tooltip = "text") %>% config(displayModeBar = "static", displaylogo = FALSE, 
                                         modeBarButtonsToRemove=list("zoom2d","select2d","lasso2d",
                                                                     "hoverClosestCartesian", "hoverCompareCartesian","resetScale2d"))
#n

Much cleaner looking plot! It's obvious where south wasco ranks in relation to the rest of the counties and geographies. We see that South wasco has the lowest median houshold income, but is very similar to that of Sherman county. Generally, the state of Oregon is seeing a steading increase in median household income, however, south wasco's growth pattern has just started increasing from 2016 to now.

Household income bracket distribution

Rather than just looking at median income, we can see where the rest of the community is distributed in household income.

#grouped bar charts
income <- select(filter(acs_counties, year == 2018), NAME, contains("income"))
income <- income %>% select(!contains("moe"), -median_household_income)
income <- melt(income, id.vars = "NAME", measure.vars = colnames(income)[-1])
ggplotly(ggplot(income)+
  geom_col(aes(x = NAME, y = value, fill = variable), position = "dodge")+ 
    scale_fill_manual(values = viridis(10, option = "D")) +
  # scale_fill_discrete(name = "Income Bracket", labels = c("Less than 10,000", "10,000-14,999", "15,000-24,999",
  #                                                         "25,000-34,999", "35,000-49,999", "50,000-74,999", 
  #                                                         "75,000-99,999","100,000-149,999", "150,000-199,999", "above 200,000")) +
  ylab("% of Population") + xlab("Region") +
  ggtitle("Income Distribution for 2018") + coord_flip())

The plot is quite dense, but we can see how the distributions of income compare across the different counties and geographies. South Wasco has close to 50% of their population of households earning between 35,000 and 74,999 dollars. But they do have some of the highest percentages of households in the lowest income bracket along with skamania, klickitat and jefferson county.

Trying to visualize with a stacked bar chart

#stacked bar charts
income <- acs_counties %>% select(NAME, year, contains("income"))
income_perc <- income %>% select(!contains("moe"), -median_household_income, NAME, year)
income_moe <- income %>% select(NAME, year, contains("moe"), -median_household_income_moe)
income_perc <- melt(income_perc, id.vars = c("NAME", "year"), measure.vars = colnames(income_perc)[-c(1,2)])
income_moe <- income_moe %>% melt(id.vars = c("NAME","year"), measure.vars = colnames(income_moe)[-c(1,2)]) %>%
  rename(moe = value)  %>% mutate(variable =gsub("_moe", "", variable))
income_table <- merge(x = income_perc, y = income_moe, by=c("NAME", "variable", "year"))

ggplotly(ggplot(filter(income_table, year ==2018))+
           geom_bar(aes(fill=variable, y=value, x=NAME), position = position_stack(reverse = TRUE), stat="identity")+ 
           scale_fill_manual(name ="Income Bracket",values = viridis(10, option = "D")) +
           # scale_fill_discrete(name = "Income Bracket", labels = c("Less than 10,000", "10,000-14,999", "15,000-24,999",
           #                                                         "25,000-34,999", "35,000-49,999", "50,000-74,999", 
           #                                                         "75,000-99,999","100,000-149,999", "150,000-199,999", "above 200,000")) +
           ylab("% of Population") + xlab("Region") +
           scale_colour_manual(values = viridis_pal(option = "D")(10)) +
           ggtitle("Income Distribution for 2018") + coord_flip()) %>% 
  config(displayModeBar = "static", displaylogo = FALSE, 
         modeBarButtonsToRemove=list("zoom2d","select2d","lasso2d","hoverClosestCartesian",
                                     "hoverCompareCartesian","resetScale2d"), tooltip = "text") 

This stacked bar chart condenses the busy plot of distributions from the previous grouped bar. It is easier to see where the heavier densities are at the income extremes.

Poverty Rate

Line chart for federal Poverty rates in 2018

ggplotly(ggplot(filter(acs_counties, year == 2018), aes(x = NAME, y = below_poverty,
                                                        text = paste0("Region: ", NAME,
                                                                      "<br>Year: ", year,
                                                                      "<br>Percent Below Federal Poverty: ", below_poverty, "%",
                                                                      "<br>Margin of Error: ", below_poverty_moe, "%"))) +
           geom_col(fill = "dark blue") +
           geom_errorbar(aes(x = NAME, ymin = below_poverty - below_poverty_moe, 
                             ymax = below_poverty + below_poverty_moe), color = "dark orange") + 
           geom_point(color = "dark orange", size = 3) + theme_minimal() + theme(axis.text.x = element_text(angle=30)) +
           xlab("Region") + ylab("% Below Poverty") + ggtitle("% of Population Below Federal Poverty Line"), tooltip = "text") %>% 
  config(displayModeBar = "static", displaylogo = FALSE,
         modeBarButtonsToRemove=list("zoom2d","select2d","lasso2d","hoverClosestCartesian", "hoverCompareCartesian","resetScale2d"))
p <- ggplot(acs_counties %>% mutate(south_wasco = fct_other(NAME, keep = c("South Wasco County School District 1, Oregon", "Wasco County, Oregon", "Oregon"), other_level = "Neighboring Counties"))
            , aes(x=year, y=below_poverty, group = NAME, color = south_wasco,
                  text = paste0("Region: ", NAME,
                                "<br>Year: ", year,
                                "<br>Percent Below Federal Poverty: ", below_poverty, "%",
                                "<br>Margin of Error: ", below_poverty_moe, "%"))) +
  geom_line(size = 1.5) + 
  geom_point(size = 2) +
  scale_colour_manual(name = "Region", values = c(viridis(3, option = "D"), graypal)) +
  scale_alpha_manual(values=c(1,1,1,0.1)) +
  theme_minimal() + ggtitle("Percent Below Federal Poverty: 2015-2018") + ylab("Percent Below Federal Poverty") + xlab("Year") 
#Note: Wasco and south wasco are from ACS5 year estimates. Moving averages.
ggplotly(p, tooltip = "text") %>% config(displayModeBar = "static", displaylogo = FALSE, 
                                         modeBarButtonsToRemove=list("zoom2d","select2d","lasso2d",
                                                                     "hoverClosestCartesian", "hoverCompareCartesian","resetScale2d"))
#n
ggplot() +
  geom_sf(data = filter(acs_tracts, year == 2018), aes(fill = below_poverty)) +
  geom_sf(fill = "transparent", color = "gray20", size = 1, 
          data = acs_tracts %>% group_by(COUNTYFP) %>% summarise()) + theme_minimal() +
  labs(title = paste("Percent of population below poverty by census track in", 2018, sep=" "))

Employment

static bar chart

# bar graphs
ggplot(filter(acs_counties, year == 2018), aes(x = NAME, y = employment_20_to_64)) +
  geom_col(fill = "dark blue")+
  geom_errorbar(aes(x = NAME, ymin = employment_20_to_64 - employment_20_to_64_moe, 
                    ymax = employment_20_to_64 + employment_20_to_64_moe), color = "dark orange") + 
  theme_minimal() + theme(axis.text.x = element_text(angle=30)) +
  geom_point(color = "dark orange", size = 3) + ggtitle("% of Adults (20-64) with Employment Status")

p <- ggplot(acs_counties %>% mutate(south_wasco = fct_other(NAME, keep = c("South Wasco County School District 1, Oregon", "Wasco County, Oregon", "Oregon"), other_level = "Neighboring Counties"))
            , aes(x=year, y=employment_20_to_64, group = NAME, color = south_wasco,
                  text = paste0("Region: ", NAME,
                                "<br>Year: ", year,
                                "<br>Percent Employed: ", employment_20_to_64, "%",
                                "<br>Margin of Error: ", employment_20_to_64_moe, "%"))) +
  geom_line(size = 1.5) + 
  geom_point(size = 2) +
  scale_colour_manual(name = "Region", values = c(viridis(3, option = "D"), graypal)) +
  scale_alpha_manual(values=c(1,1,1,0.1)) +
  theme_minimal() + ggtitle("Employment Ratio for Adults 20 to 64: 2015-2018") + ylab("Employment Ratio") + xlab("Year") 
#Note: Wasco and south wasco are from ACS5 year estimates. Moving averages.
ggplotly(p, tooltip = "text") %>% config(displayModeBar = "static", displaylogo = FALSE, 
                                         modeBarButtonsToRemove=list("zoom2d","select2d","lasso2d",
                                                                     "hoverClosestCartesian", "hoverCompareCartesian","resetScale2d"))
#n

static map

# sf map
ggplot() +
  geom_sf(data = filter(acs_tracts, year == 2018), aes(fill = employment_20_to_64)) +
  labs(title = "Percent of employed adults adults 20 to 64 by census track") #+ 

Labor participation rate

p <- ggplot(acs_counties %>% mutate(south_wasco = fct_other(NAME, keep = c("South Wasco County School District 1, Oregon", "Wasco County, Oregon", "Oregon"), other_level = "Neighboring Counties"))
            , aes(x=year, y=labor_force_20_to_64, group = NAME, color = south_wasco,
                  text = paste0("Region: ", NAME,
                                "<br>Year: ", year,
                                "<br>Labor Force Participation Rate: ", labor_force_20_to_64, "%",
                                "<br>Margin of Error: ", labor_force_20_to_64_moe, "%"))) +
  geom_line(size = 1.5) + 
  geom_point(size = 2) +
  scale_colour_manual(name = "Region", values = c(viridis(3, option = "D"), graypal)) +
  scale_alpha_manual(values=c(1,1,1,0.1)) +
  theme_minimal() + ggtitle("Labor Force Participation Rate for Adults 20 to 64: 2015-2018") + ylab("Labor Force Participation Rate") + xlab("Year") 
#Note: Wasco and south wasco are from ACS5 year estimates. Moving averages.
ggplotly(p, tooltip = "text") %>% config(displayModeBar = "static", displaylogo = FALSE, 
                                         modeBarButtonsToRemove=list("zoom2d","select2d","lasso2d",
                                                                     "hoverClosestCartesian", "hoverCompareCartesian","resetScale2d"))
#n

Housing

p <- ggplot(acs_counties %>% mutate(south_wasco = fct_other(NAME, keep = c("South Wasco County School District 1, Oregon", "Wasco County, Oregon", "Oregon"), 
                                                            other_level = "Neighboring Counties"))
            , aes(x=year, y=affordable_housing_all_perc, group = NAME, color = south_wasco,
                  text = paste0("Region: ", NAME,
                                "<br>Year: ", year,
                                "<br>Affordable Housing: ", round(affordable_housing_all_perc, digits = 1), "%"))) +
  geom_line(size = 1.5) + 
  geom_point(size = 2) +
  scale_colour_manual(name = "Region", values = c(viridis(3, option = "D"), graypal))  +
  theme_minimal() + ggtitle("Affordable Housing 2015-2018") + ylab("Affordable Housing") + xlab("Year")
#Note: Wasco and south wasco are from ACS5 year estimates. Moving averages.
ggplotly(p, tooltip = "text") %>% config(displayModeBar = "static", displaylogo = FALSE, 
                                         modeBarButtonsToRemove=list("zoom2d","select2d","lasso2d",
                                                                     "hoverClosestCartesian", "hoverCompareCartesian","resetScale2d"))

The line graph looks very busy, but an adjustment to the line type and transparency should help. Generally, we can see that in 2018, South Wasco has one of the highest percenteges of affordable housing amongst occupied housing units.

housing <- select(acs_counties, NAME, year, contains("affordable_housing"))
housing_rent_own_perc <- housing %>% select(NAME, year, affordable_housing_own_perc, affordable_housing_rent_perc)
housing_rent_own_moe <- housing %>% select(NAME, year, affordable_housing_own_perc_moe, affordable_housing_rent_perc_moe) 
housing_rent_own_perc <- melt(housing_rent_own_perc, id.vars = c("NAME", "year"),measure.vars = colnames(housing_rent_own_perc)[-c(1,2)])
housing_rent_own_moe <- melt(housing_rent_own_moe, id.vars = c("NAME", "year"),measure.vars = colnames(housing_rent_own_moe)[-c(1,2)]) %>%
  rename(moe = value)  %>% mutate(variable =gsub("_moe", "", variable))
housing_rent_own_table <- merge(x = housing_rent_own_perc, y = housing_rent_own_moe, by=c("NAME", "variable", "year"))
#grouped bar chart for own and rent occupancy
ggplotly(ggplot(filter(housing_rent_own_table, year == 2018),aes(x = NAME, y = value, fill = variable), 
                text = paste0("Region: ", NAME,
                              "<br>Year: ", year,
                              "<br>Affordable Housing: ", round(value, digits = 1), "%")) +
           geom_col(position = "dodge") + 
           scale_fill_discrete(name = "Housing Ownership", labels = c("Own", "Rent")) +
           #theme_minimal() + theme(axis.text.x = element_text(angle=30)) + 
           ylab("% of Occupied housing units") + xlab("Region") + coord_flip() + theme_minimal() +
           ggtitle("Affordable Housing 2015-2018", subtitle = "Occupied households where monthly costs are less than 30% of houshold income"), tooltip = "text")

Of the occupied housing units, South Wasco's community ranks highly in being able to afford their monthly housing costs given their houshold income.

#divergent bar chart to split up own and rent occupancy
housing_diverge <- housing_rent_own_table %>% mutate(value = as.numeric(ifelse(variable == "affordable_housing_own_perc",
                                                     value, -1*value)))
ggplotly(ggplot(filter(housing_diverge, year == 2018),
                aes(x = NAME, y = value, fill = variable,
                    text = paste0("Region: ", NAME,
                                  "<br>Year: ", 2018,
                                  "<br>Affordable Housing: ", round(abs(value), digits = 1), "%")))+
           geom_bar(stat = "identity") + 
           scale_y_continuous(breaks = pretty(housing_diverge$value), labels = abs(pretty(housing_diverge$value))) +
           scale_colour_manual(name = "Housing Ownership", values = viridis(2, option="D")) +
           scale_fill_discrete(name = "Housing Ownership", labels = c("Own", "Rent")) +
           theme_minimal() + labs(x="Region",y="% of Occupied Housing Units") +
           coord_flip(), tooltip = "text") %>% layout(title = list(text = paste0("Affordable Housing 2015-2018",
                                           '<br>','<sup>',
                                           "% of occupied households where monthly costs are less than 30% of houshold income",
                                            '</sup>'))) 

An alternative way to visualize the percentage of occupied housing units whose monthly housing costs are less than 30% of household income

% of affordable housing by household income bracket

housing <- select(filter(acs_counties, year == 2018), NAME, contains("affordable_housing"))
housing_by_income <- housing %>% select(NAME, !contains("perc") & !contains("total"))
housing_by_income <- melt(housing_by_income, id.vars = "NAME", measure.vars = colnames(housing_by_income)[-c(1,4)])
#grouped bar chart for own and rent occupancy
ggplotly(ggplot(housing_by_income, aes(x = NAME, y = value, fill = variable), 
                text = paste0("Region: ", NAME,
                              "<br>Year: ", year,
                              "<br>Affordable Housing: ", round(value, digits = 1), "%")) +
           geom_col(position = "dodge") + 
           #scale_fill_discrete(name = "Housing Ownership", labels = c("Own", "Rent")) +
           #theme_minimal() + theme(axis.text.x = element_text(angle=30)) + 
           ylab("% of Occupied housing units") + xlab("Region") + coord_flip() +
           ggtitle("Affordable Housing 2015-2018", subtitle = "Occupied households where monthly costs are less than 30% of houshold income"), tooltip = "text")

Social

Racial Diversity

#----------Racial Diversity---------------
#convert wide to long (split up moe and remerge....)
race <- acs_counties_neighbors %>% select(GEOID,NAME, year, contains("race")) # select appropriate variables
race_moe <- race %>% select(NAME,year, contains("moe")) #separate moe estimates
race_moe <- race_moe %>% melt(id.vars = c("NAME","year"), measure.vars = colnames(race_moe)[-c(1,2)]) %>%
  rename(moe = value)  %>% mutate(variable =gsub("_moe", "", variable))
race <- race %>% select(!contains("moe"), NAME, year)
race <- melt(race, id.vars = c("NAME", "year"),measure.vars = colnames(race)[-c(1,2)])
race_table <- merge(x = race, y = race_moe, by=c("NAME", "variable", "year"))

#plot all races onto one large set of grouped bars for every county.
ggplotly(ggplot(filter(race_table, year == 2018)) +
  geom_col(aes(x = NAME, y = value, fill = variable), position = "dodge")+ 
  #geom_errorbar(aes(x = as.factor(NAME), ymin=value-moe, ymax=value+moe), width=.2,position="dodge") +
  scale_fill_manual(values = viridis(8, option="D"),name="Groups",labels = c("White", "Black or African American", "American Indian or Alaskan Native",
                                 "Asian", "Native Hawaiian or Pacific Islander", "Hispanic or Latino", 
                                 "Two or More","Other")) +
  #theme_minimal() + theme(axis.text.x = element_text(angle=45)) + 
  ylab("% of Population") + xlab("Region") + coord_flip() + theme_minimal() +
  ggtitle("% Racial and Ethnic Diversity"))

This plot is not yet interactive, and needs some work on ordering of the grouped bars. But generally, south wasco, and its surrounding neighbors are majority a white puplation. The second most populus racial group are Hispanic or Latino group.

Family Stability

customizable tool tip and legend not working

family <- select(filter(acs_counties_neighbors), NAME, year,contains("family"))
family_perc <- family %>% select(NAME, year, family_married_parent_perc, family_single_parent_perc, 
                                 family_children_nonfamily_perc, family_nonfamily_household_perc,
                                 family_nonfamily_household_perc)
family_moe <- family %>% select(NAME, year, family_married_parent_perc_moe, family_single_parent_perc_moe,
                               family_children_nonfamily_perc_moe, family_nonfamily_household_perc_moe,
                               family_nonfamily_household_perc_moe)
family_moe <- melt(family_moe, id.vars = c("NAME","year"), measure.vars = colnames(family_moe)[-c(1,2)]) %>% 
  rename("moe" ="value") %>% mutate(variable =gsub("_moe", "", variable))
family_perc <- melt(family_perc, id.vars = c("NAME","year"), measure.vars = colnames(family_perc)[-c(1,2)])
family_table <- merge(x = family_perc, y = family_moe, by=c("NAME", "variable", "year"))
#grouped bar chart for family type
ggplotly(ggplot(filter(family_table, year == 2018), aes(x = NAME, y = value, fill = variable), 
                text = paste0("Region: ", NAME,
                              "<br>Year: ", year,
                              "<br>% Children: ", round(value, digits = 1), "%")) +
           geom_col(position = "dodge") + 
           scale_fill_manual(values = viridis(4, option="D"), name="Family Type")  +
           ylab("% of children") + xlab("Region") + coord_flip()+ theme_minimal() +
           ggtitle("Family Structure for Children under 18-2018"))

Compared to its immediate neighbors, South Wasco has the highest percentage of children who are in a household with no family members. South Wasco also has the highest percentage of people living alone or with non family members.

Educational Attainment

grouped bar charts, legend won't adjust :(

ed <- select(filter(acs_counties_neighbors), NAME, year, contains("education"))
ed_perc <- ed %>% select(NAME, year,education_less_hs, education_hs_grad, education_assoc_some_college, education_bachelors_or_higher)
ed_moe <- ed %>% select(NAME, year, education_less_hs_moe, education_hs_grad_moe, 
                        education_assoc_some_college_moe, education_bachelors_or_higher_moe)
ed_moe <- melt(ed_moe, id.vars = c("NAME", "year"), measure.vars = colnames(ed_moe)[-c(1,2)]) %>% 
  rename("moe" ="value") %>% mutate(variable =gsub("_moe", "", variable))
ed_perc <- melt(ed_perc, id.vars = c("NAME", "year"), measure.vars = colnames(ed_perc)[-c(1,2)])
ed_table <- merge(x = ed_perc, y = ed_moe, by=c("NAME", "variable", "year"))
#grouped bar chart for own and rent occupancy
ggplotly(ggplot(filter(ed_table, year == 2018)) +
           geom_col(aes(x = NAME, y = value, fill = variable), position = "dodge") +
           scale_fill_manual(values = viridis(4, option = "D"),
                             name = "Educational Attainment",
                             breaks = c("education_less_hs","education_hs_grad","education_assoc_some_college", "education_bachelors_or_higher"),
                             labels = c("Less than High School", "High School Graduate or Equivalent (GED)","Associates Degree or Some College", "Bachelors or Higher")) +
           #theme_minimal() + theme(axis.text.x = element_text(angle=30)) + 
           ylab("% of Adults 25 and Older") + xlab("Region") + 
           coord_flip()+ theme_minimal() +
           ggtitle("Educational Attainment for Adults 25 and older-2018"))

Generally, South Wasco has a large percentage of adults that are high school graduates and have some higher education. However, It has some of the lowest percentages of adults that havea bachelors degree or higher. Hood river has the highest educated population.